import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn import metrics
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
df=pd.read_excel("CaseStudy_Cancer.xls")
df.head(2)
| ID | B-M | radius | texture | perimeter | area | smoothness | compactness | concavity | concave points | ... | radius-W | texture-W | perimeter-W | area-W | smoothness-W | compactness-W | concavity-W | concave points-W | Symmetry-W | fractal dimension-W | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 842302 | M | 17.99 | 10.38 | 122.8 | 1001.0 | 0.11840 | 0.27760 | 0.3001 | 0.14710 | ... | 25.38 | 17.33 | 184.6 | 2019.0 | 0.1622 | 0.6656 | 0.7119 | 0.2654 | 0.4601 | 0.11890 |
| 1 | 842517 | M | 20.57 | 17.77 | 132.9 | 1326.0 | 0.08474 | 0.07864 | 0.0869 | 0.07017 | ... | 24.99 | 23.41 | 158.8 | 1956.0 | 0.1238 | 0.1866 | 0.2416 | 0.1860 | 0.2750 | 0.08902 |
2 rows × 32 columns
df.shape
(569, 32)
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')
<matplotlib.axes._subplots.AxesSubplot at 0x14e15550>
df[df.isnull().any(axis=1)]
| ID | B-M | radius | texture | perimeter | area | smoothness | compactness | concavity | concave points | ... | radius-W | texture-W | perimeter-W | area-W | smoothness-W | compactness-W | concavity-W | concave points-W | Symmetry-W | fractal dimension-W |
|---|
0 rows × 32 columns
df.drop('ID',axis =1,inplace=True)
# Drop the Id sequence as that is just a random number
df.dtypes
B-M object radius float64 texture float64 perimeter float64 area float64 smoothness float64 compactness float64 concavity float64 concave points float64 Symmetry float64 fractal dimension float64 SE-radius float64 texture-SE float64 perimeter-SE float64 area-SE float64 smoothness-SE float64 compactness-SE float64 concavity-SE float64 concave points-SE float64 Symmetry-SE float64 fractal dimension-SE float64 radius-W float64 texture-W float64 perimeter-W float64 area-W float64 smoothness-W float64 compactness-W float64 concavity-W float64 concave points-W float64 Symmetry-W float64 fractal dimension-W float64 dtype: object
#cleanup_nums = {"B-M": {"M": 1, "B": 0}}
#df.replace(cleanup_nums, inplace=True)
#df.dtypes
#heat map for correlation just to know the quantified relation between the variable
#calculate the correlation matrix
corr = df.corr()
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
#draw the correlation table
def magnify():
return [dict(selector="th",
props=[("font-size", "7pt")]),
dict(selector="td",
props=[('padding', "0em 0em")]),
dict(selector="th:hover",
props=[("font-size", "12pt")]),
dict(selector="tr:hover td:hover",
props=[('max-width', '200px'),
('font-size', '12pt')])
]
corr.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '80px', 'font-size': '10pt'})\
.set_caption("Hover to magify")\
.set_precision(2)\
.set_table_styles(magnify())
| B-M | radius | texture | perimeter | area | smoothness | compactness | concavity | concave points | Symmetry | fractal dimension | SE-radius | texture-SE | perimeter-SE | area-SE | smoothness-SE | compactness-SE | concavity-SE | concave points-SE | Symmetry-SE | fractal dimension-SE | radius-W | texture-W | perimeter-W | area-W | smoothness-W | compactness-W | concavity-W | concave points-W | Symmetry-W | fractal dimension-W | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| B-M | 1 | 0.73 | 0.42 | 0.74 | 0.71 | 0.36 | 0.6 | 0.7 | 0.78 | 0.33 | -0.013 | 0.57 | -0.0083 | 0.56 | 0.55 | -0.067 | 0.29 | 0.25 | 0.41 | -0.0065 | 0.078 | 0.78 | 0.46 | 0.78 | 0.73 | 0.42 | 0.59 | 0.66 | 0.79 | 0.42 | 0.32 |
| radius | 0.73 | 1 | 0.32 | 1 | 0.99 | 0.17 | 0.51 | 0.68 | 0.82 | 0.15 | -0.31 | 0.68 | -0.097 | 0.67 | 0.74 | -0.22 | 0.21 | 0.19 | 0.38 | -0.1 | -0.043 | 0.97 | 0.3 | 0.97 | 0.94 | 0.12 | 0.41 | 0.53 | 0.74 | 0.16 | 0.0071 |
| texture | 0.42 | 0.32 | 1 | 0.33 | 0.32 | -0.023 | 0.24 | 0.3 | 0.29 | 0.071 | -0.076 | 0.28 | 0.39 | 0.28 | 0.26 | 0.0066 | 0.19 | 0.14 | 0.16 | 0.0091 | 0.054 | 0.35 | 0.91 | 0.36 | 0.34 | 0.078 | 0.28 | 0.3 | 0.3 | 0.11 | 0.12 |
| perimeter | 0.74 | 1 | 0.33 | 1 | 0.99 | 0.21 | 0.56 | 0.72 | 0.85 | 0.18 | -0.26 | 0.69 | -0.087 | 0.69 | 0.74 | -0.2 | 0.25 | 0.23 | 0.41 | -0.082 | -0.0055 | 0.97 | 0.3 | 0.97 | 0.94 | 0.15 | 0.46 | 0.56 | 0.77 | 0.19 | 0.051 |
| area | 0.71 | 0.99 | 0.32 | 0.99 | 1 | 0.18 | 0.5 | 0.69 | 0.82 | 0.15 | -0.28 | 0.73 | -0.066 | 0.73 | 0.8 | -0.17 | 0.21 | 0.21 | 0.37 | -0.072 | -0.02 | 0.96 | 0.29 | 0.96 | 0.96 | 0.12 | 0.39 | 0.51 | 0.72 | 0.14 | 0.0037 |
| smoothness | 0.36 | 0.17 | -0.023 | 0.21 | 0.18 | 1 | 0.66 | 0.52 | 0.55 | 0.56 | 0.58 | 0.3 | 0.068 | 0.3 | 0.25 | 0.33 | 0.32 | 0.25 | 0.38 | 0.2 | 0.28 | 0.21 | 0.036 | 0.24 | 0.21 | 0.81 | 0.47 | 0.43 | 0.5 | 0.39 | 0.5 |
| compactness | 0.6 | 0.51 | 0.24 | 0.56 | 0.5 | 0.66 | 1 | 0.88 | 0.83 | 0.6 | 0.57 | 0.5 | 0.046 | 0.55 | 0.46 | 0.14 | 0.74 | 0.57 | 0.64 | 0.23 | 0.51 | 0.54 | 0.25 | 0.59 | 0.51 | 0.57 | 0.87 | 0.82 | 0.82 | 0.51 | 0.69 |
| concavity | 0.7 | 0.68 | 0.3 | 0.72 | 0.69 | 0.52 | 0.88 | 1 | 0.92 | 0.5 | 0.34 | 0.63 | 0.076 | 0.66 | 0.62 | 0.099 | 0.67 | 0.69 | 0.68 | 0.18 | 0.45 | 0.69 | 0.3 | 0.73 | 0.68 | 0.45 | 0.75 | 0.88 | 0.86 | 0.41 | 0.51 |
| concave points | 0.78 | 0.82 | 0.29 | 0.85 | 0.82 | 0.55 | 0.83 | 0.92 | 1 | 0.46 | 0.17 | 0.7 | 0.021 | 0.71 | 0.69 | 0.028 | 0.49 | 0.44 | 0.62 | 0.095 | 0.26 | 0.83 | 0.29 | 0.86 | 0.81 | 0.45 | 0.67 | 0.75 | 0.91 | 0.38 | 0.37 |
| Symmetry | 0.33 | 0.15 | 0.071 | 0.18 | 0.15 | 0.56 | 0.6 | 0.5 | 0.46 | 1 | 0.48 | 0.3 | 0.13 | 0.31 | 0.22 | 0.19 | 0.42 | 0.34 | 0.39 | 0.45 | 0.33 | 0.19 | 0.091 | 0.22 | 0.18 | 0.43 | 0.47 | 0.43 | 0.43 | 0.7 | 0.44 |
| fractal dimension | -0.013 | -0.31 | -0.076 | -0.26 | -0.28 | 0.58 | 0.57 | 0.34 | 0.17 | 0.48 | 1 | 0.00011 | 0.16 | 0.04 | -0.09 | 0.4 | 0.56 | 0.45 | 0.34 | 0.35 | 0.69 | -0.25 | -0.051 | -0.21 | -0.23 | 0.5 | 0.46 | 0.35 | 0.18 | 0.33 | 0.77 |
| SE-radius | 0.57 | 0.68 | 0.28 | 0.69 | 0.73 | 0.3 | 0.5 | 0.63 | 0.7 | 0.3 | 0.00011 | 1 | 0.21 | 0.97 | 0.95 | 0.16 | 0.36 | 0.33 | 0.51 | 0.24 | 0.23 | 0.72 | 0.19 | 0.72 | 0.75 | 0.14 | 0.29 | 0.38 | 0.53 | 0.095 | 0.05 |
| texture-SE | -0.0083 | -0.097 | 0.39 | -0.087 | -0.066 | 0.068 | 0.046 | 0.076 | 0.021 | 0.13 | 0.16 | 0.21 | 1 | 0.22 | 0.11 | 0.4 | 0.23 | 0.19 | 0.23 | 0.41 | 0.28 | -0.11 | 0.41 | -0.1 | -0.083 | -0.074 | -0.092 | -0.069 | -0.12 | -0.13 | -0.046 |
| perimeter-SE | 0.56 | 0.67 | 0.28 | 0.69 | 0.73 | 0.3 | 0.55 | 0.66 | 0.71 | 0.31 | 0.04 | 0.97 | 0.22 | 1 | 0.94 | 0.15 | 0.42 | 0.36 | 0.56 | 0.27 | 0.24 | 0.7 | 0.2 | 0.72 | 0.73 | 0.13 | 0.34 | 0.42 | 0.55 | 0.11 | 0.085 |
| area-SE | 0.55 | 0.74 | 0.26 | 0.74 | 0.8 | 0.25 | 0.46 | 0.62 | 0.69 | 0.22 | -0.09 | 0.95 | 0.11 | 0.94 | 1 | 0.075 | 0.28 | 0.27 | 0.42 | 0.13 | 0.13 | 0.76 | 0.2 | 0.76 | 0.81 | 0.13 | 0.28 | 0.39 | 0.54 | 0.074 | 0.018 |
| smoothness-SE | -0.067 | -0.22 | 0.0066 | -0.2 | -0.17 | 0.33 | 0.14 | 0.099 | 0.028 | 0.19 | 0.4 | 0.16 | 0.4 | 0.15 | 0.075 | 1 | 0.34 | 0.27 | 0.33 | 0.41 | 0.43 | -0.23 | -0.075 | -0.22 | -0.18 | 0.31 | -0.056 | -0.058 | -0.1 | -0.11 | 0.1 |
| compactness-SE | 0.29 | 0.21 | 0.19 | 0.25 | 0.21 | 0.32 | 0.74 | 0.67 | 0.49 | 0.42 | 0.56 | 0.36 | 0.23 | 0.42 | 0.28 | 0.34 | 1 | 0.8 | 0.74 | 0.39 | 0.8 | 0.2 | 0.14 | 0.26 | 0.2 | 0.23 | 0.68 | 0.64 | 0.48 | 0.28 | 0.59 |
| concavity-SE | 0.25 | 0.19 | 0.14 | 0.23 | 0.21 | 0.25 | 0.57 | 0.69 | 0.44 | 0.34 | 0.45 | 0.33 | 0.19 | 0.36 | 0.27 | 0.27 | 0.8 | 1 | 0.77 | 0.31 | 0.73 | 0.19 | 0.1 | 0.23 | 0.19 | 0.17 | 0.48 | 0.66 | 0.44 | 0.2 | 0.44 |
| concave points-SE | 0.41 | 0.38 | 0.16 | 0.41 | 0.37 | 0.38 | 0.64 | 0.68 | 0.62 | 0.39 | 0.34 | 0.51 | 0.23 | 0.56 | 0.42 | 0.33 | 0.74 | 0.77 | 1 | 0.31 | 0.61 | 0.36 | 0.087 | 0.39 | 0.34 | 0.22 | 0.45 | 0.55 | 0.6 | 0.14 | 0.31 |
| Symmetry-SE | -0.0065 | -0.1 | 0.0091 | -0.082 | -0.072 | 0.2 | 0.23 | 0.18 | 0.095 | 0.45 | 0.35 | 0.24 | 0.41 | 0.27 | 0.13 | 0.41 | 0.39 | 0.31 | 0.31 | 1 | 0.37 | -0.13 | -0.077 | -0.1 | -0.11 | -0.013 | 0.06 | 0.037 | -0.03 | 0.39 | 0.078 |
| fractal dimension-SE | 0.078 | -0.043 | 0.054 | -0.0055 | -0.02 | 0.28 | 0.51 | 0.45 | 0.26 | 0.33 | 0.69 | 0.23 | 0.28 | 0.24 | 0.13 | 0.43 | 0.8 | 0.73 | 0.61 | 0.37 | 1 | -0.037 | -0.0032 | -0.001 | -0.023 | 0.17 | 0.39 | 0.38 | 0.22 | 0.11 | 0.59 |
| radius-W | 0.78 | 0.97 | 0.35 | 0.97 | 0.96 | 0.21 | 0.54 | 0.69 | 0.83 | 0.19 | -0.25 | 0.72 | -0.11 | 0.7 | 0.76 | -0.23 | 0.2 | 0.19 | 0.36 | -0.13 | -0.037 | 1 | 0.36 | 0.99 | 0.98 | 0.22 | 0.48 | 0.57 | 0.79 | 0.24 | 0.093 |
| texture-W | 0.46 | 0.3 | 0.91 | 0.3 | 0.29 | 0.036 | 0.25 | 0.3 | 0.29 | 0.091 | -0.051 | 0.19 | 0.41 | 0.2 | 0.2 | -0.075 | 0.14 | 0.1 | 0.087 | -0.077 | -0.0032 | 0.36 | 1 | 0.37 | 0.35 | 0.23 | 0.36 | 0.37 | 0.36 | 0.23 | 0.22 |
| perimeter-W | 0.78 | 0.97 | 0.36 | 0.97 | 0.96 | 0.24 | 0.59 | 0.73 | 0.86 | 0.22 | -0.21 | 0.72 | -0.1 | 0.72 | 0.76 | -0.22 | 0.26 | 0.23 | 0.39 | -0.1 | -0.001 | 0.99 | 0.37 | 1 | 0.98 | 0.24 | 0.53 | 0.62 | 0.82 | 0.27 | 0.14 |
| area-W | 0.73 | 0.94 | 0.34 | 0.94 | 0.96 | 0.21 | 0.51 | 0.68 | 0.81 | 0.18 | -0.23 | 0.75 | -0.083 | 0.73 | 0.81 | -0.18 | 0.2 | 0.19 | 0.34 | -0.11 | -0.023 | 0.98 | 0.35 | 0.98 | 1 | 0.21 | 0.44 | 0.54 | 0.75 | 0.21 | 0.08 |
| smoothness-W | 0.42 | 0.12 | 0.078 | 0.15 | 0.12 | 0.81 | 0.57 | 0.45 | 0.45 | 0.43 | 0.5 | 0.14 | -0.074 | 0.13 | 0.13 | 0.31 | 0.23 | 0.17 | 0.22 | -0.013 | 0.17 | 0.22 | 0.23 | 0.24 | 0.21 | 1 | 0.57 | 0.52 | 0.55 | 0.49 | 0.62 |
| compactness-W | 0.59 | 0.41 | 0.28 | 0.46 | 0.39 | 0.47 | 0.87 | 0.75 | 0.67 | 0.47 | 0.46 | 0.29 | -0.092 | 0.34 | 0.28 | -0.056 | 0.68 | 0.48 | 0.45 | 0.06 | 0.39 | 0.48 | 0.36 | 0.53 | 0.44 | 0.57 | 1 | 0.89 | 0.8 | 0.61 | 0.81 |
| concavity-W | 0.66 | 0.53 | 0.3 | 0.56 | 0.51 | 0.43 | 0.82 | 0.88 | 0.75 | 0.43 | 0.35 | 0.38 | -0.069 | 0.42 | 0.39 | -0.058 | 0.64 | 0.66 | 0.55 | 0.037 | 0.38 | 0.57 | 0.37 | 0.62 | 0.54 | 0.52 | 0.89 | 1 | 0.86 | 0.53 | 0.69 |
| concave points-W | 0.79 | 0.74 | 0.3 | 0.77 | 0.72 | 0.5 | 0.82 | 0.86 | 0.91 | 0.43 | 0.18 | 0.53 | -0.12 | 0.55 | 0.54 | -0.1 | 0.48 | 0.44 | 0.6 | -0.03 | 0.22 | 0.79 | 0.36 | 0.82 | 0.75 | 0.55 | 0.8 | 0.86 | 1 | 0.5 | 0.51 |
| Symmetry-W | 0.42 | 0.16 | 0.11 | 0.19 | 0.14 | 0.39 | 0.51 | 0.41 | 0.38 | 0.7 | 0.33 | 0.095 | -0.13 | 0.11 | 0.074 | -0.11 | 0.28 | 0.2 | 0.14 | 0.39 | 0.11 | 0.24 | 0.23 | 0.27 | 0.21 | 0.49 | 0.61 | 0.53 | 0.5 | 1 | 0.54 |
| fractal dimension-W | 0.32 | 0.0071 | 0.12 | 0.051 | 0.0037 | 0.5 | 0.69 | 0.51 | 0.37 | 0.44 | 0.77 | 0.05 | -0.046 | 0.085 | 0.018 | 0.1 | 0.59 | 0.44 | 0.31 | 0.078 | 0.59 | 0.093 | 0.22 | 0.14 | 0.08 | 0.62 | 0.81 | 0.69 | 0.51 | 0.54 | 1 |
df['B-M'].value_counts().plot(kind='bar',color='purple')
plt.title("Diagnosis Details")
plt.ylabel('Diagnosis counts')
plt.xlabel('Diagnosis type');
#Univariate graphs for each attribute grouped by class variable
fig = plt.figure()
fig.set_figheight(5)
fig.set_figwidth(15)
num_bins = 10
#radius
ax1 = fig.add_subplot(331)
ax1.hist(np.array(df[df['B-M']=='B']['radius']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['radius']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("radius")
#texture
ax1 = fig.add_subplot(332)
ax1.hist(np.array(df[df['B-M']=='B']['texture']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['texture']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("texture")
#texture
ax1 = fig.add_subplot(333)
ax1.hist(np.array(df[df['B-M']=='B']['perimeter']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['perimeter']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("perimeter")
#area
ax1 = fig.add_subplot(334)
ax1.hist(np.array(df[df['B-M']=='B']['area']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['area']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("area")
#smoothness
ax1 = fig.add_subplot(335)
ax1.hist(np.array(df[df['B-M']=='B']['smoothness']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['smoothness']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("smoothness")
#compactness
ax1 = fig.add_subplot(336)
ax1.hist(np.array(df[df['B-M']=='B']['compactness']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['compactness']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("compactness")
#concavity
ax1 = fig.add_subplot(337)
ax1.hist(np.array(df[df['B-M']=='B']['concavity']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['concavity']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("concavity")
#concave points
ax1 = fig.add_subplot(338)
ax1.hist(np.array(df[df['B-M']=='B']['concave points']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['concave points']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("concave points")
#Symmetry
ax1 = fig.add_subplot(339)
ax1.hist(np.array(df[df['B-M']=='B']['Symmetry']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['Symmetry']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("Symmetry")
plt.tight_layout()
plt.show()
#Univariate graphs for each attribute grouped by class variable
fig = plt.figure()
fig.set_figheight(5)
fig.set_figwidth(15)
num_bins = 10
#fractal dimension
ax1 = fig.add_subplot(331)
ax1.hist(np.array(df[df['B-M']=='B']['fractal dimension']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['fractal dimension']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("fractal dimension")
#SE-radius
ax1 = fig.add_subplot(332)
ax1.hist(np.array(df[df['B-M']=='B']['SE-radius']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['SE-radius']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("SE-radius")
#texture-SE
ax1 = fig.add_subplot(333)
ax1.hist(np.array(df[df['B-M']=='B']['texture-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['texture-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("texture-SE")
#perimeter-SE
ax1 = fig.add_subplot(334)
ax1.hist(np.array(df[df['B-M']=='B']['perimeter-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['perimeter-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("perimeter-SE")
#area-SE
ax1 = fig.add_subplot(335)
ax1.hist(np.array(df[df['B-M']=='B']['area-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['area-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("area-SE")
#smoothness-SE
ax1 = fig.add_subplot(336)
ax1.hist(np.array(df[df['B-M']=='B']['smoothness-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['smoothness-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("smoothness-SE")
#compactness-SE
ax1 = fig.add_subplot(337)
ax1.hist(np.array(df[df['B-M']=='B']['compactness-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['compactness-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("compactness-SE")
#concavity-SE
ax1 = fig.add_subplot(338)
ax1.hist(np.array(df[df['B-M']=='B']['concavity-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['concavity-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("concavity-SE")
#concave points-SE
ax1 = fig.add_subplot(339)
ax1.hist(np.array(df[df['B-M']=='B']['concave points-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['concave points-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("concave points-SE")
plt.tight_layout()
plt.show()
#Univariate graphs for each attribute grouped by class variable
fig = plt.figure()
fig.set_figheight(5)
fig.set_figwidth(15)
num_bins = 10
#Symmetry-SE
ax1 = fig.add_subplot(331)
ax1.hist(np.array(df[df['B-M']=='B']['Symmetry-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['Symmetry-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("Symmetry-SE")
#fractal dimension-SE
ax1 = fig.add_subplot(332)
ax1.hist(np.array(df[df['B-M']=='B']['fractal dimension-SE']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['fractal dimension-SE']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("fractal dimension-SE")
#perimeter-W
ax1 = fig.add_subplot(333)
ax1.hist(np.array(df[df['B-M']=='B']['perimeter-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['perimeter-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("perimeter-W")
#radius-W
ax1 = fig.add_subplot(334)
ax1.hist(np.array(df[df['B-M']=='B']['radius-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['radius-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("radius-W")
#texture-W
ax1 = fig.add_subplot(335)
ax1.hist(np.array(df[df['B-M']=='B']['texture-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['texture-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("texture-W")
#area-W
ax1 = fig.add_subplot(336)
ax1.hist(np.array(df[df['B-M']=='B']['area-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['area-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("area-W")
#smoothness-W
ax1 = fig.add_subplot(337)
ax1.hist(np.array(df[df['B-M']=='B']['smoothness-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['smoothness-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("smoothness-W")
#compactness-W
ax1 = fig.add_subplot(338)
ax1.hist(np.array(df[df['B-M']=='B']['compactness-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['compactness-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("compactness-W")
#concavity-W
ax1 = fig.add_subplot(339)
ax1.hist(np.array(df[df['B-M']=='B']['concavity-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['concavity-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("concavity-W")
plt.tight_layout()
plt.show()
#Univariate graphs for each attribute grouped by class variable
fig = plt.figure()
fig.set_figheight(5)
fig.set_figwidth(15)
num_bins = 10
#concave points-W
ax1 = fig.add_subplot(331)
ax1.hist(np.array(df[df['B-M']=='B']['concave points-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['concave points-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("concave points-W")
#fractal dimension-SE
ax1 = fig.add_subplot(332)
ax1.hist(np.array(df[df['B-M']=='B']['Symmetry-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['Symmetry-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("Symmetry-W")
#perimeter-W
ax1 = fig.add_subplot(333)
ax1.hist(np.array(df[df['B-M']=='B']['fractal dimension-W']), num_bins, normed=0, facecolor='blue', alpha=0.5,label="1")
ax1.hist(np.array(df[df['B-M']=='M']['fractal dimension-W']), num_bins, normed=0, facecolor='red', alpha=0.5,label="0")
plt.legend(loc='upper right')
plt.title("fractal dimension-W")
plt.tight_layout()
plt.show()
sns.pairplot(df,hue='B-M',palette='Set1')
<seaborn.axisgrid.PairGrid at 0x1749a5f8>